load("~/Documents/Nonparametric Statisics/Project/clean data/full_collisions.RData")
glimpse(full_collisions)
## Rows: 2,585,717
## Columns: 37
## $ accident_index <chr> "200501BS00001", "200501BS…
## $ accident_year <dbl> 2005, 2005, 2005, 2005, 20…
## $ accident_reference <chr> "01BS00001", "01BS00002", …
## $ location_easting_osgr <dbl> 525680, 524170, 524520, 52…
## $ location_northing_osgr <dbl> 178240, 181650, 182240, 17…
## $ longitude <dbl> -0.191170, -0.211708, -0.2…
## $ latitude <dbl> 51.48910, 51.52007, 51.525…
## $ police_force <fct> Metropolitan Police, Metro…
## $ accident_severity <fct> Serious, Slight, Slight, S…
## $ number_of_vehicles <dbl> 1, 1, 2, 1, 1, 2, 2, 1, 2,…
## $ number_of_casualties <dbl> 1, 1, 1, 1, 1, 1, 1, 2, 2,…
## $ date <date> 2005-01-04, 2005-01-05, 2…
## $ day_of_week <fct> Tuesday, Wednesday, Thursd…
## $ time <time> 17:42:00, 17:36:00, 00:15…
## $ local_authority_district <fct> "Kensington and Chelsea", …
## $ local_authority_ons_district <chr> "E09000020", "E09000020", …
## $ local_authority_highway <chr> "E09000020", "E09000020", …
## $ first_road_class <fct> A, B, C, A, Unclassified, …
## $ first_road_number <dbl> 3218, 450, 0, 3220, 0, 0, …
## $ road_type <fct> Single carriageway, Dual c…
## $ speed_limit <dbl> 30, 30, 30, 30, 30, 30, 30…
## $ junction_detail <fct> Not at junction or within …
## $ junction_control <fct> Data missing or out of ran…
## $ second_road_class <fct> Not at junction or within …
## $ second_road_number <dbl> -1, 0, -1, -1, -1, -1, 0, …
## $ pedestrian_crossing_human_control <fct> None within 50 metres, Non…
## $ pedestrian_crossing_physical_facilities <fct> "Zebra", "Pedestrian phase…
## $ light_conditions <fct> Daylight, Darkness - light…
## $ weather_conditions <fct> Raining no high winds, Fin…
## $ road_surface_conditions <fct> Wet or damp, Dry, Dry, Dry…
## $ special_conditions_at_site <fct> None, None, None, None, No…
## $ carriageway_hazards <fct> None, None, None, None, No…
## $ urban_or_rural_area <fct> Urban, Urban, Urban, Urban…
## $ did_police_officer_attend_scene_of_accident <fct> Yes, Yes, Yes, Yes, Yes, Y…
## $ trunk_road_flag <fct> Non-trunk, Non-trunk, Non-…
## $ lsoa_of_accident_location <chr> "E01002849", "E01002909", …
## $ datetime <dttm> 2005-01-04 17:42:00, 2005…
doing some EDA with a focus on severity:
full_collisions %>% ggplot(aes(accident_severity)) + geom_bar()
## year
full_collisions %>% ggplot(aes(accident_year)) + geom_bar(aes(fill = accident_severity))
fixing the scale to see the proportions:
full_collisions %>% ggplot(aes(y = accident_year, fill = accident_severity)) +
geom_bar(position = "fill")
the composition seems to change over the years, more serious than slight
full_collisions %>% ggplot(aes(time,color = accident_severity)) + geom_freqpoly(bins = 48)
full_collisions %>% filter(accident_severity == "Fatal") %>% ggplot(aes(time,color = accident_severity)) + geom_freqpoly(bins = 48)
same pattern for this.
full_collisions %>% ggplot(aes(x = time, fill = accident_severity)) +
geom_histogram(bins = 48,position = "fill")
this could be interesting as well, in the night the severity increases.
full_collisions$day_of_week <- factor(full_collisions$day_of_week,levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"),ordered = T)
full_collisions %>% ggplot(aes(day_of_week)) + geom_bar(aes(fill = accident_severity))
full_collisions %>% ggplot(aes(x = day_of_week, fill = accident_severity)) +
geom_bar(position = "fill")
the severity changes
full_collisions %>% mutate(n_accidents = n(),day = yday(date)) %>% ggplot(aes(day,color = as.factor(accident_year))) + geom_freqpoly(bins = 366) + theme(legend.position = "none")
we can see the covid years:
clearly:
full_collisions %>% filter(accident_year > 2018) %>% mutate(n_accidents = n(),day = yday(date)) %>% ggplot(aes(day,color = as.factor(accident_year))) + geom_freqpoly(bins = 366) + theme(legend.position = "top")
we can try to look for a trend in the day of the month:
full_collisions %>% mutate(n_accidents = n(),day = mday(date)) %>% ggplot(aes(day,color = as.factor(accident_year))) + geom_freqpoly(bins = 31) + theme(legend.position = "none")
nothing
full_collisions %>% ggplot(aes(number_of_vehicles)) + geom_bar(aes(fill = accident_severity)) + xlim(c(1,10))
## Warning: Removed 142 rows containing non-finite values (`stat_count()`).
## Warning: Removed 6 rows containing missing values (`geom_bar()`).
full_collisions %>% ggplot(aes(x = number_of_vehicles, fill = accident_severity)) +
geom_bar(position = "fill") + xlim(c(1,10))
## Warning: Removed 142 rows containing non-finite values (`stat_count()`).
## Warning: Removed 6 rows containing missing values (`geom_bar()`).
full_collisions %>% ggplot(aes(number_of_casualties)) + geom_bar(aes(fill = accident_severity)) + xlim(c(1,10))
## Warning: Removed 529 rows containing non-finite values (`stat_count()`).
## Warning: Removed 6 rows containing missing values (`geom_bar()`).
full_collisions %>% ggplot(aes(x = number_of_casualties, fill = accident_severity)) +
geom_bar(position = "fill") + xlim(c(1,10))
## Warning: Removed 529 rows containing non-finite values (`stat_count()`).
## Warning: Removed 6 rows containing missing values (`geom_bar()`).
full_collisions %>% ggplot(aes(first_road_class)) + geom_bar(aes(fill = accident_severity))
full_collisions %>% ggplot(aes(x = first_road_class, fill = accident_severity)) +
geom_bar(position = "fill")
full_collisions %>% ggplot(aes(second_road_class)) + geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(x = second_road_class, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(road_type)) + geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(x = road_type, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(as.factor(speed_limit))) + geom_bar(aes(fill = accident_severity))
full_collisions %>% ggplot(aes(x = as.factor(speed_limit), fill = accident_severity)) +
geom_bar(position = "fill")
full_collisions %>% ggplot(aes(junction_detail)) + geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(x = junction_detail, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(junction_control)) + geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 90))
full_collisions %>% ggplot(aes(x = junction_control, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 90))
p1 <- full_collisions %>% ggplot(aes(pedestrian_crossing_human_control)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = pedestrian_crossing_human_control, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(pedestrian_crossing_human_control)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = pedestrian_crossing_human_control, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(light_conditions)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = light_conditions, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(weather_conditions)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = weather_conditions, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(special_conditions_at_site)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = special_conditions_at_site, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(carriageway_hazards)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = carriageway_hazards, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 60)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(did_police_officer_attend_scene_of_accident)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 45)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = did_police_officer_attend_scene_of_accident, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 45)) +
theme(legend.position = "none")
p1 + p2
p1 <- full_collisions %>% ggplot(aes(trunk_road_flag)) +
geom_bar(aes(fill = accident_severity)) + scale_x_discrete(guide = guide_axis(angle = 45)) +
theme(legend.position = "none")
p2 <- full_collisions %>% ggplot(aes(x = trunk_road_flag, fill = accident_severity)) +
geom_bar(position = "fill") + scale_x_discrete(guide = guide_axis(angle = 45)) +
theme(legend.position = "none")
p1 + p2